import pandas as pd
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
#Jiawei Yang: jiy020@ucsd.edu
#Code Sample 2: Secondhand real-estate online trading activity visualization by State in U.S.
import bs4
import requests
#loading processed state-frequency dataset
state_freq = pd.read_csv('state_freq.csv')[['state_name', 'freq']]
state_freq.head(3)
| state_name | freq | |
|---|---|---|
| 0 | Arizona | 7 |
| 1 | Arkansas | 1 |
| 2 | California | 51 |
#Web scrape U.S. state abbreviation
abbr_holder = {}
res = requests.get('https://www.bu.edu/brand/guidelines/editorial-style/us-state-abbreviations/')
soup = bs4.BeautifulSoup(res.text)
artcl = soup.find('article', {'class': 'content-area post-116 page type-page status-publish hentry'})
artcl.find_all('tr')[1].find_all('td')[0].text
for entry in artcl.find_all('tr'):
elements = entry.find_all('td')
#abbreviations are of len 2
if len(elements[2].text) == 2:
abbr_holder[elements[0].text.lower()]=elements[2].text
#match abbr by full name
state_freq['state_abbr'] = state_freq['state_name'].apply(lambda s: abbr_holder[s.lower()])
freq_dt = dict(type = 'choropleth',
locations = state_freq['state_abbr'],
locationmode = 'USA-states',
colorscale = 'mint',
text = state_freq['state_name'],
z = state_freq['freq'],
colorbar = {'title':'bargain counts'})
layout = dict(geo = {'scope':'usa'})
choromap = go.Figure(data = [freq_dt], layout = layout)
choromap.show()
state_freq['freq'].values
array([ 7, 1, 51, 5, 6, 5, 2, 12, 11, 16, 1, 2, 1, 1, 1, 3, 3,
1, 5, 5, 1, 1, 1, 21, 1, 12, 23, 1])